1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 import java.nio.charset.*;
32 import java.nio.*;
33 import java.util.*;
34
35 public class TestIBMDB {
36 static class Time {
37 long t;
38 }
39 static int iteration = 200;
40
41 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
42 throws Exception {
43 String csn = cs.name();
44 CharsetDecoder dec = cs.newDecoder();
45 ByteBuffer bbf;
46 CharBuffer cbf;
47 if (testDirect) {
48 bbf = ByteBuffer.allocateDirect(bb.length);
49 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
50 bbf.put(bb);
51 } else {
52 bbf = ByteBuffer.wrap(bb);
53 cbf = CharBuffer.allocate(bb.length);
54 }
55 CoderResult cr = null;
56 long t1 = System.nanoTime()/1000;
57 for (int i = 0; i < iteration; i++) {
58 bbf.rewind();
59 cbf.clear();
60 dec.reset();
61 cr = dec.decode(bbf, cbf, true);
62 }
63 long t2 = System.nanoTime()/1000;
64 t.t = (t2 - t1)/iteration;
65 if (cr != CoderResult.UNDERFLOW) {
66 System.out.println("DEC-----------------");
67 int pos = bbf.position();
68 System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
69 cr.toString(), pos,
70 bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
71 throw new RuntimeException("Decoding err: " + csn);
72 }
73 char[] cc = new char[cbf.position()];
74 cbf.flip(); cbf.get(cc);
75 return cc;
76
77 }
78
79 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
80 throws Exception {
81 CharsetDecoder dec = cs.newDecoder();
82 ByteBuffer bbf;
83 CharBuffer cbf;
84 if (testDirect) {
85 bbf = ByteBuffer.allocateDirect(bb.length);
86 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
87 bbf.put(bb).flip();
88 } else {
89 bbf = ByteBuffer.wrap(bb);
90 cbf = CharBuffer.allocate(bb.length);
91 }
92 CoderResult cr = null;
93 for (int i = 0; i < iteration; i++) {
94 bbf.rewind();
95 cbf.clear();
96 dec.reset();
97 cr = dec.decode(bbf, cbf, true);
98 }
99 return cr;
100 }
101
102 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
103 throws Exception {
104 ByteBuffer bbf;
105 CharBuffer cbf;
106 CharsetEncoder enc = cs.newEncoder();
107 String csn = cs.name();
108 if (testDirect) {
109 bbf = ByteBuffer.allocateDirect(cc.length * 4);
110 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
111 cbf.put(cc).flip();
112 } else {
113 bbf = ByteBuffer.allocate(cc.length * 4);
114 cbf = CharBuffer.wrap(cc);
115 }
116 CoderResult cr = null;
117 long t1 = System.nanoTime()/1000;
118 for (int i = 0; i < iteration; i++) {
119 cbf.rewind();
120 bbf.clear();
121 enc.reset();
122 cr = enc.encode(cbf, bbf, true);
123 }
124 long t2 = System.nanoTime()/1000;
125 t.t = (t2 - t1)/iteration;
126 if (cr != CoderResult.UNDERFLOW) {
127 System.out.println("ENC-----------------");
128 int pos = cbf.position();
129 System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n",
130 cr.toString(), pos, cc[pos]&0xffff);
131 throw new RuntimeException("Encoding err: " + csn);
132 }
133 byte[] bb = new byte[bbf.position()];
134 bbf.flip(); bbf.get(bb);
135 return bb;
136 }
137
138 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
139 throws Exception {
140 ByteBuffer bbf;
141 CharBuffer cbf;
142 CharsetEncoder enc = cs.newEncoder();
143 if (testDirect) {
144 bbf = ByteBuffer.allocateDirect(cc.length * 4);
145 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
146 cbf.put(cc).flip();
147 } else {
148 bbf = ByteBuffer.allocate(cc.length * 4);
149 cbf = CharBuffer.wrap(cc);
150 }
151 CoderResult cr = null;
152 for (int i = 0; i < iteration; i++) {
153 cbf.rewind();
154 bbf.clear();
155 enc.reset();
156 cr = enc.encode(cbf, bbf, true);
157 }
158 return cr;
159 }
160
161 static void printEntry(char c, Charset cs) {
162 byte[] bb = new String(new char[] {c}).getBytes(cs);
163 for (byte b:bb)
164 System.out.printf("%x", b&0xff);
165 System.out.printf(" %x", c & 0xffff);
166 String s2 = new String(bb, cs);
167 System.out.printf(" %x%n", s2.charAt(0) & 0xffff);
168 }
169
170
171 static char[] checkEncoding(Charset oldCS, Charset newCS)
172 throws Exception {
173 System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name());
174 CharsetEncoder encOLD = oldCS.newEncoder();
175 CharsetEncoder encNew = newCS.newEncoder();
176 char[] cc = new char[0x10000];
177 int pos = 0;
178 boolean is970 = "x-IBM970-Old".equals(oldCS.name());
179
180 for (char c = 0; c < 0xffff; c++) {
181 boolean canOld = encOLD.canEncode(c);
182 boolean canNew = encNew.canEncode(c);
183
184 if (is970 && c == 0x2299)
185 continue;
186
187 if (canOld != canNew) {
188 if (canNew) {
189 System.out.printf(" NEW(only): ");
190 printEntry(c, newCS);
191 } else {
192 if (is970) {
193 byte[] bb = new String(new char[] {c}).getBytes(oldCS);
194 if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) {
195
196 continue;
197 }
198 }
199 System.out.printf(" OLD(only): ");
200 printEntry(c, oldCS);
201 }
202 } else if (canNew) {
203 byte[] bbNew = new String(new char[] {c}).getBytes(newCS);
204 byte[] bbOld = new String(new char[] {c}).getBytes(oldCS);
205 if (!Arrays.equals(bbNew, bbOld)) {
206 System.out.printf(" c->b NEW: ");
207 printEntry(c, newCS);
208 System.out.printf(" c->b OLD: ");
209 printEntry(c, oldCS);
210 } else {
211 String sNew = new String(bbNew, newCS);
212 String sOld = new String(bbOld, oldCS);
213 if (!sNew.equals(sOld)) {
214 System.out.printf(" b2c NEW (c=%x):", c&0xffff);
215 printEntry(sNew.charAt(0), newCS);
216 System.out.printf(" b2c OLD:");
217 printEntry(sOld.charAt(0), oldCS);
218 }
219 }
220 }
221 if (canNew & canOld) {
222 cc[pos++] = c;
223 }
224 }
225 return Arrays.copyOf(cc, pos);
226 }
227
228
229
230 static void checkDecoding(Charset oldCS, Charset newCS)
231 throws Exception
232 {
233 System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name());
234 boolean isEBCDIC = oldCS.name().startsWith("x-IBM93");
235
236
237 byte[] bb = new byte[1];
238 System.out.printf(" trying SB...%n");
239 for (int b = 0; b < 0x100; b++) {
240 bb[0] = (byte)b;
241 String sOld = new String(bb, oldCS);
242 String sNew = new String(bb, newCS);
243 if (!sOld.equals(sNew)) {
244 System.out.printf(" b=%x: %x/%d(old) %x/%d(new)%n",
245 b& 0xff,
246 sOld.charAt(0) & 0xffff, sOld.length(),
247 sNew.charAt(0) & 0xffff, sNew.length());
248 }
249 }
250
251 System.out.printf(" trying DB...%n");
252 bb = new byte[isEBCDIC?4:2];
253 int b1Min = 0x40;
254 int b1Max = 0xfe;
255 for (int b1 = 0x40; b1 < 0xff; b1++) {
256 if (!isEBCDIC) {
257
258 bb[0] = (byte)b1;
259 String sOld = new String(bb, oldCS);
260 String sNew = new String(bb, newCS);
261 if (!sOld.equals(sNew)) {
262 if (sOld.length() != 2 && sOld.charAt(0) != 0) {
263
264 System.out.printf(" b1=%x: %x/%d(old) %x/%d(new)%n",
265 b1 & 0xff,
266 sOld.charAt(0) & 0xffff, sOld.length(),
267 sNew.charAt(0) & 0xffff, sNew.length());
268 continue;
269 }
270 }
271 }
272 for (int b2 = 0x40; b2 < 0xff; b2++) {
273 if (isEBCDIC) {
274 bb[0] = 0x0e;
275 bb[1] = (byte)b1;
276 bb[2] = (byte)b2;
277 bb[3] = 0x0f;
278 } else {
279 bb[0] = (byte)b1;
280 bb[1] = (byte)b2;
281 }
282 String sOld = new String(bb, oldCS);
283 String sNew = new String(bb, newCS);
284
285 if (sOld.charAt(0) != sNew.charAt(0)) {
286
287 if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd)
288 continue;
289
290 System.out.printf(" bb=<%x,%x> c(old)=%x, c(new)=%x%n",
291 b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff);
292 }
293 }
294 }
295 }
296
297 static void checkInit(String csn) throws Exception {
298 System.out.printf("Check init <%s>...%n", csn);
299 Charset.forName("Big5");
300 long t1 = System.nanoTime()/1000;
301 Charset cs = Charset.forName(csn);
302 long t2 = System.nanoTime()/1000;
303 System.out.printf(" charset :%d%n", t2 - t1);
304 t1 = System.nanoTime()/1000;
305 cs.newDecoder();
306 t2 = System.nanoTime()/1000;
307 System.out.printf(" new Decoder :%d%n", t2 - t1);
308
309 t1 = System.nanoTime()/1000;
310 cs.newEncoder();
311 t2 = System.nanoTime()/1000;
312 System.out.printf(" new Encoder :%d%n", t2 - t1);
313 }
314
315 static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception {
316 System.gc();
317 Thread.sleep(1000);
318 System.gc();
319
320 String csn1 = cs1.name();
321 String csn2 = cs2.name();
322 System.out.printf("Diff <%s> <%s>...%n", csn1, csn2);
323
324 Time t1 = new Time();
325 Time t2 = new Time();
326
327 byte[] bb1 = encode(cc, cs1, false, t1);
328 byte[] bb2 = encode(cc, cs2, false, t2);
329
330 System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n",
331 csn2, csn1,
332 t2.t, t1.t,
333 (double)(t2.t)/(t1.t));
334 if (!Arrays.equals(bb1, bb2)) {
335 System.out.printf(" encoding failed%n");
336 }
337
338 char[] cc2 = decode(bb1, cs2, false, t2);
339 char[] cc1 = decode(bb1, cs1, false, t1);
340 System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n",
341 csn2, csn1,
342 t2.t, t1.t,
343 (double)(t2.t)/(t1.t));
344 if (!Arrays.equals(cc1, cc2)) {
345 System.out.printf(" decoding failed%n");
346 }
347
348 bb1 = encode(cc, cs1, true, t1);
349 bb2 = encode(cc, cs2, true, t2);
350
351 System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
352 csn2, csn1,
353 t2.t, t1.t,
354 (double)(t2.t)/(t1.t));
355
356 if (!Arrays.equals(bb1, bb2))
357 System.out.printf(" encoding (direct) failed%n");
358
359 cc1 = decode(bb1, cs1, true, t1);
360 cc2 = decode(bb1, cs2, true, t2);
361 System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
362 csn2, csn1,
363 t2.t, t1.t,
364 (double)(t2.t)/(t1.t));
365 if (!Arrays.equals(cc1, cc2)) {
366 System.out.printf(" decoding (direct) failed%n");
367 }
368 }
369
370
371
372
373
374
375
376 static void checkMalformed(Charset cs, byte[][] malformed)
377 throws Exception
378 {
379 boolean failed = false;
380 String csn = cs.name();
381 System.out.printf("Check malformed <%s>...%n", csn);
382 for (boolean direct: new boolean[] {false, true}) {
383 for (byte[] bins : malformed) {
384 int mlen = bins[0];
385 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
386 CoderResult cr = decodeCR(bin, cs, direct);
387 String ashex = "";
388 for (int i = 0; i < bin.length; i++) {
389 if (i > 0) ashex += " ";
390 ashex += Integer.toString((int)bin[i] & 0xff, 16);
391 }
392 if (!cr.isMalformed()) {
393 System.out.printf(" FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString());
394 failed = true;
395 } else if (cr.length() != mlen) {
396 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
397 failed = true;
398 }
399 }
400 }
401 if (failed)
402 throw new RuntimeException("Check malformed failed " + csn);
403 }
404
405 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
406 int inPos = flow[0];
407 int inLen = flow[1];
408 int outPos = flow[2];
409 int outLen = flow[3];
410 int expedInPos = flow[4];
411 int expedOutPos = flow[5];
412 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
413 :CoderResult.OVERFLOW;
414 ByteBuffer bbf;
415 CharBuffer cbf;
416 if (direct) {
417 bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
418 cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
419 } else {
420 bbf = ByteBuffer.allocate(inPos + bytes.length);
421 cbf = CharBuffer.allocate(outPos + outLen);
422 }
423 bbf.position(inPos);
424 bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
425 cbf.position(outPos);
426 dec.reset();
427 CoderResult cr = dec.decode(bbf, cbf, false);
428 if (cr != expedCR ||
429 bbf.position() != expedInPos ||
430 cbf.position() != expedOutPos) {
431 System.out.printf("Expected(direct=%5b): [", direct);
432 for (int i:flow) System.out.print(" " + i);
433 System.out.println("] CR=" + cr +
434 ", inPos=" + bbf.position() +
435 ", outPos=" + cbf.position());
436 return false;
437 }
438 return true;
439 }
440
441 static void checkUnderOverflow(Charset cs) throws Exception {
442 String csn = cs.name();
443 System.out.printf("Check under/overflow <%s>...%n", csn);
444 CharsetDecoder dec = cs.newDecoder();
445 boolean failed = false;
446
447
448
449 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
450 int inlen = bytes.length;
451
452 int MAXOFF = 20;
453 for (int inoff = 0; inoff < MAXOFF; inoff++) {
454 for (int outoff = 0; outoff < MAXOFF; outoff++) {
455 int[][] Flows = {
456
457
458 {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1},
459 {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1},
460 {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1},
461 {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1},
462 {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1},
463 {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0},
464
465 {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0},
466 {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0},
467 {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0},
468 {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0},
469 {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0},
470 {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0},
471 {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0},
472 {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0},
473 {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0},
474 {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0},
475 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0},
476
477 {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0},
478 {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1},
479 {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0},
480 };
481 for (boolean direct: new boolean[] {false, true}) {
482 for (int[] flow: Flows) {
483 if (!check(dec, bytes, direct, flow))
484 failed = true;
485 }
486 }}}
487 if (failed)
488 throw new RuntimeException("Check under/overflow failed " + csn);
489 }
490
491 static String[] csnames = new String[] {
492 "IBM930",
493 "IBM933",
494 "IBM935",
495 "IBM937",
496 "IBM939",
497 "IBM942",
498 "IBM943",
499 "IBM948",
500 "IBM949",
501 "IBM950",
502 "IBM970",
503 "IBM942C",
504 "IBM943C",
505 "IBM949C",
506 "IBM1381",
507 "IBM1383",
508
509 "EUC_CN",
510 "EUC_KR",
511 "GBK",
512 "Johab",
513 "MS932",
514 "MS936",
515 "MS949",
516 "MS950",
517 };
518
519 public static void main(String[] args) throws Exception {
520 for (String csname: csnames) {
521 System.out.printf("-----------------------------------%n");
522 String oldname = csname + "_OLD";
523 checkInit(csname);
524 Charset csOld = (Charset)Class.forName(oldname).newInstance();
525 Charset csNew = Charset.forName(csname);
526 char[] cc = checkEncoding(csOld, csNew);
527 checkDecoding(csOld, csNew);
528 compare(csNew, csOld, cc);
529
530 if (csname.startsWith("x-IBM93")) {
531
532 checkMalformed(csNew, new byte[][] {
533 {1, 0x26, 0x0f, 0x27},
534 {1, 0x0e, 0x41, 0x41, 0xe},
535 {2, 0x0e, 0x40, 0x41, 0xe},
536 });
537 } else if (csname.equals("x-IBM970") ||
538 csname.equals("x-IBM1383")) {
539
540 checkMalformed(csNew, new byte[][] {
541 {1, 0x26, (byte)0x8f, 0x27},
542 {1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51},
543 });
544 }
545 }
546 }
547 }